*****************************************************************************************

* This file processes the merged data set into the form used for analysis

*****************************************************************************************

/* steps in below file

1. create outcome variables and generate data flags
2. create full sample and its treatment variables (including some alternative treatments)
3. create matched sample and its treatment variables

*/


** set directory

********************************************************************************
* construct outcome variables and apply sample restrictions

{

* load data
use "1980-2018_permits_master_SR.dta", replace

* make population variable for every year
gen pop=.
gen decade=floor(year/10)*10
local year_list 80 90 00 10
foreach year in `year_list'{

	replace pop = pop`year' if decade==19`year' | decade==20`year'
	
}	

* sum across building size to get total unit counts
rename Units_*unit_reported units_*
egen total_units=rowtotal(units_*)

egen imputed_total_units=rowtotal(Units_*)
gen imputed_lessSingle_units=imputed_total_units-Units_1unit_census

* hyperbolic arcsin and log transformations of unit counts
gen log_all=log(total_units+1)
gen log_5plus=log(units_5+1)
gen log_single=log(units_1+1)

gen ihs_all=asinh(total_units)
gen ihs_5plus=asinh(units_5)
gen ihs_single=asinh(units_1)

gen units_lessSingle=total_units-units_1
gen log_lessSingle=log(units_lessSingle+1)
gen ihs_lessSingle=asinh(units_lessSingle)

* normalize unit counts by baseline population
gen pc_all=total_units/pop80*1000
gen pc_5plus=units_5/pop80*1000
gen pc_single=units_1/pop80*1000
gen pc_lessSingle=(total_units-units_1)/pop80*1000

* make lihtc outcomes
gen log_li=log(li_units+1)
gen ihs_li=asinh(li_units)
gen log_total=log(n_units+1)
gen ihs_total=asinh(n_units)
gen li_ratio=li_units/n_units
gen li_indicator=(li_units>0 & li_units !=.)
bys id: egen lihtc_sample=max(li_indicator)

* quality control variables for various issues 

* flag years where units permitted don't match 
gen match_units_flag=(imputed_total_units!=total_units)
gen match_multi_units_flag=(imputed_lessSingle_units!=units_lessSingle)

* flag mismatched gov types that don't report a change
* don't throw out mixed versus ward switchers
sort id year
drop previous_gov
encode gov_type, gen(num_gov_type)
replace num_gov_type=2 if num_gov_type==3
gen lag5_num_gov_type=L5.num_gov_type

gen loose_al_w=(num_gov_type ==2 &  lag5_num_gov_type==1)
replace loose_al_w = 0 if AL_to_W != . | AL_to_Wor !=. 

gen loose_w_al=(num_gov_type ==1 &  lag5_num_gov_type==2)
replace loose_al_w = 0 if W_to_AL != .  

gen pre_loose_al_w_year=loose_al_w*year
bys id: egen loose_al_w_year=max(pre_loose_al_w_year)
replace loose_al_w_year=. if loose_al_w_year==0

gen pre_loose_w_al_year=loose_w_al*year
bys id: egen loose_w_al_year=max(pre_loose_w_al_year)
replace loose_w_al_year=. if loose_w_al_year==0

gen misreport_gov_flag=(loose_w_al_year!=. | loose_al_w_year!=.)
drop lag5 num_gov_type pre_loose*

* flag places that report w to al
gen report_reverse=W_to_AL!=.
bys id: egen report_reverse_flag=max(report_reverse)
drop report_reverse

* flag places below 2500 population threshold
gen pop_flag=(pop80<2500)

* combine flags into one variables
egen any_flag=rowmax(*flag)
replace any_flag=1 if any_flag>1
bys id: egen any_flag_ever=max(any_flag)

* fill in gov types for unsurveyed years
tsset id year
gen current_gov=.
replace current_gov=1 if gov_type=="At-large"
replace current_gov=2 if gov_type=="Mixed"
replace current_gov=3 if gov_type=="Ward"

forvalues i = 1(1)20{

	replace current_gov = F`i'.current_gov if current_gov==.
	replace current_gov = L`i'.current_gov if current_gov==.
	

}

* create variable for towns that are  always at large
gen al_town=(current_gov==1 | current_gov==.) 
replace al_town=0 if members_W>0 & members_W!=.
bys id: egen never_ward=min(al_town)

* merge in CBSAs identifier
tostring statefips, gen(state_fips)
tostring countyfips, gen(county_fips)

replace state_fips="0"+state_fips if length(state_fips)==1
replace county_fips="00"+county_fips if length(county_fips)==1
replace county_fips="0"+county_fips if length(county_fips)==2

gen fipscounty=state_fips+county_fips
merge m:1 fipscounty using cbsatocountycrosswalk, keepus(msa)
keep if _merge==3
drop _merge fipscounty county_fips state_fips

save cleaned_data, replace

}


********************************************************************************
* baseline treatment set up

{

* use only at-large to ward
use cleaned_data, clear

* fe
egen state_year=group(statefips year)
egen county_year=group(county statefips year)

* make primary treatment variable to cut on
bys id: egen switch_count=count(AL_to_W)
keep if switch_count<2
drop switch_count

bys id: egen treatment_year=min(AL_to_W)
gen ever_treated=(treatment_year != .)

* make reverse treatment variables to cut on
bys id: egen reverse_treatment_year=max(loose_w_al_year)
gen reverse_ever_treated=(reverse_treatment_year != .)

* make mixed treatment variables to cut on
bys id: egen wm_treatment_year=min(AL_to_Wor)
gen wm_ever_treated=(wm_treatment_year != .)

* make treatment variable that includes misreports as switchers
gen loose_treatment_year=treatment_year
replace loose_treatment_year=loose_w_al_year-2 if loose_treatment_year==. & loose_w_al_year!=.
gen loose_ever_treated=(loose_treatment_year != .)

* primary after treatment variable
gen after_treatment=0
replace after_treatment=(year>treatment_year)
gen too_late=(ever_treated==1 & year>treatment_year+7)

* primary event study variable

gen ALW_n8 = (year+8 <= treatment_year & treatment_year!=.)

local j_list 6 4   
foreach j in `j_list'{

	local i = `j'+1
	gen ALW_n`j'=(treatment_year==year+`i' |treatment_year==year+`j'  )
	
}

gen ALW_zero=0

local j_list 0 2 4 6  
foreach j in `j_list'{

	local i = `j'-1
	gen ALW_`j'=(treatment_year==year-`i' |treatment_year==year-`j'  )
	
}

gen ALW_7 = (year-7 >= treatment_year & treatment_year!=.)

* several alternative treatments are created below

* at large to ward or mixed

gen wm_after_treatment=0
replace wm_after_treatment=(year>wm_treatment_year) 
gen wm_too_late=(wm_ever_treated==1 & year>wm_treatment_year+7)

gen ALM_n8 = (year+8 <= wm_treatment_year & wm_treatment_year!=.)

local j_list 6 4   
foreach j in `j_list'{

	local i = `j'+1
	gen ALM_n`j'=(wm_treatment_year==year+`i' |wm_treatment_year==year+`j'  )
	
}

gen ALM_zero=0

local j_list 0 2 4 6   
foreach j in `j_list'{

	local i = `j'+1
	gen ALM_`j'=(wm_treatment_year==year-`i' |wm_treatment_year==year-`j'  )
	
}

gen ALM_7 = (year-7 >= wm_treatment_year & wm_treatment_year!=.)


* reverse event study variables
gen reverse_after_treatment=0
replace reverse_after_treatment=(year>reverse_treatment_year) 
gen reverse_too_late=(reverse_ever_treated==1 & year>reverse_treatment_year+7)

gen WAL_n8 = (year+8 <= reverse_treatment_year & reverse_treatment_year!=.)


local j_list 6 4
foreach j in `j_list'{

	local i = `j'+1
	gen WAL_n`j'=(reverse_treatment_year==year+`i' |reverse_treatment_year==year+`j'  )
	
}

gen WAL_zero=0

local j_list 0 2 4 6
foreach j in `j_list'{

	local i = `j'+1
	gen WAL_`j'=(reverse_treatment_year==year-`i' |reverse_treatment_year==year-`j'  )
	
}

gen WAL_7 = (year-7 >= reverse_treatment_year & reverse_treatment_year!=.)


* loose treatment event study variables

gen loose_after_treatment=0
replace loose_after_treatment=(year>loose_treatment_year) 
gen loose_too_late=(loose_ever_treated==1 & year>loose_treatment_year+7)

gen LS_n8 = (year+8 <= loose_treatment_year & loose_treatment_year!=.)

local j_list 6 4
foreach j in `j_list'{

	local i = `j'+1
	gen LS_n`j'=(loose_treatment_year==year+`i' |loose_treatment_year==year+`j'  )
	
}

gen LS_zero=0

local j_list 0 2 4 6
foreach j in `j_list'{

	local i = `j'+1
	gen LS_`j'=(loose_treatment_year==year-`i' |loose_treatment_year==year-`j'  )
	
}

gen LS_7 = (year-7 >= loose_treatment_year & loose_treatment_year!=.)

*** this is the primary analysis file
save analysis_file,replace

}

********************************************************************************
* construct matched sample and create treatment variables

{

use cleaned_data, clear

* make treatment variables to cut on
bys id: egen treatment_year=min(AL_to_W)
gen ever_treated=(treatment_year != .)

* drop the places that we don't want to include in the sample
bys id: egen switch_count=count(AL_to_W)
keep if switch_count<2
drop switch_count

* apply these flags in the build file for this sample so that we don't match to towns we later drop
* (they are applied while running regressions for baseline sample)
drop if treatment_year<1981
keep if never_ward==1 | ever_treated==1
drop if pop80<2500


* get matches on 1980 characteristics
keep if year==1980

nnmatch log_all ever_treated lpop80 pct_white80 pct_owned80  median_hh_income80 pc_all ///
	, exact(statefips) tc(att) m(10) keep(matches) replace
	
* clean the matches
use matches, clear
keep lpop80_0m pct_white80_0m 
duplicates drop lpop80_0m pct_white80_0m , force
rename (lpop80_0m pct_white80_0m ) ///
	(lpop80 pct_white80 ) 
save stripped_matches, replace

* go back to main data set and merge in matches
use cleaned_data, clear
merge m:1 lpop80 pct_white80 using stripped_matches

* make treatment variables to cut on again
bys id: egen treatment_year=min(AL_to_W)
gen ever_treated=(treatment_year != .)

* keep ever treated and matched only 
keep if ever_treated==1 | _merge==3
drop _merge

* drop things that we don't want again
bys id: egen switch_count=count(AL_to_W)
keep if switch_count<2
drop switch_count

drop if treatment_year<1981
keep if never_ward==1 | ever_treated==1
drop if pop80<2500

* create fe
egen state_year=group(statefips year)
egen county_year=group(county statefips year)

* make treated after variables 
gen after_treatment=0
replace after_treatment=(year>treatment_year) 
gen too_late=(ever_treated==1 & year>treatment_year+7)

* main event study variables

gen ALW_n8 = (year+8 <= treatment_year & treatment_year!=.)

local j_list 6 4   
foreach j in `j_list'{

	local i = `j'+1
	gen ALW_n`j'=(treatment_year==year+`i' |treatment_year==year+`j'  )
	
}

gen ALW_zero=0

local j_list 0 2 4 6  
foreach j in `j_list'{

	local i = `j'-1
	gen ALW_`j'=(treatment_year==year-`i' |treatment_year==year-`j'  )
	
}

gen ALW_7 = (year-7 >= treatment_year & treatment_year!=.)


*** this is the matched analysis file
save matched_analysis_file,replace

}

